library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(magrittr)
library(ggplot2)
library(forcats)
library(stringr)
The csv can be found at https://catalog.data.gov/dataset/traffic-violations-56dda
#The csv takes several minutes to load
#If the rds has been created, loading that takes far less time
traffic<-data.frame()
if(file.exists("MOCO_traffic.rds")){
traffic <- readRDS("MOCO_traffic.rds")
} else {
traffic <- readr::read_csv("Traffic_Violations.csv")
saveRDS(traffic,"MOCO_traffic.rds")
}
## Parsed with column specification:
## cols(
## .default = col_character(),
## `Time Of Stop` = col_time(format = ""),
## Latitude = col_double(),
## Longitude = col_double(),
## Year = col_double()
## )
## See spec(...) for full column specifications.
#convert the column names to camel case
names(traffic) %<>% str_replace_all(" ","_") %>% tolower()
#convert dates and times and convert yes/no to TRUE/FALSE
traffic %<>% mutate(date_of_stop = lubridate::mdy(date_of_stop), time_of_stop = lubridate::hms(time_of_stop), accident = accident == "Yes", belts = belts == "Yes", personal_injury = personal_injury == "Yes", property_damage = property_damage == "Yes", fatal = fatal == "Yes", commercial_license = commercial_license == "Yes", hazmat = hazmat == "Yes", commercial_vehicle = commercial_vehicle == "Yes", alcohol = alcohol == "Yes", work_zone = work_zone == "Yes", contributed_to_accident = contributed_to_accident == "Yes")
#drop geolocation column which won't be used
traffic %<>% select(-geolocation)
#Add additional columns for the year, month, and day of the violation
traffic %<>% mutate(year_of_stop = lubridate::year(date_of_stop), month_of_stop = lubridate::month(date_of_stop), month_year = lubridate::make_date(year = year_of_stop, month = month_of_stop), day_of_stop = lubridate::day(date_of_stop))
#remove current year so that only complete years are included in the data
traffic%<>%filter(year_of_stop != lubridate::year(Sys.Date()))
knitr::kable(head(traffic))
| date_of_stop | time_of_stop | agency | subagency | description | location | latitude | longitude | accident | belts | personal_injury | property_damage | fatal | commercial_license | hazmat | commercial_vehicle | alcohol | work_zone | state | vehicletype | year | make | model | color | violation_type | charge | article | contributed_to_accident | race | gender | driver_city | driver_state | dl_state | arrest_type | year_of_stop | month_of_stop | month_year | day_of_stop |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2013-09-24 | 17H 11M 0S | MCP | 3rd district, Silver Spring | DRIVING VEHICLE ON HIGHWAY WITH SUSPENDED REGISTRATION | 8804 FLOWER AVE | NA | NA | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | MD | 02 - Automobile | 2008 | FORD | 4S | BLACK | Citation | 13-401(h) | Transportation Article | FALSE | BLACK | M | TAKOMA PARK | MD | MD | A - Marked Patrol | 2013 | 9 | 2013-09-01 | 24 |
| 2017-08-29 | 10H 19M 0S | MCP | 2nd district, Bethesda | DRIVER FAILURE TO OBEY PROPERLY PLACED TRAFFIC CONTROL DEVICE INSTRUCTIONS | WISCONSIN AVE@ ELM ST | 38.98172 | -77.09276 | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | VA | 02 - Automobile | 2001 | TOYOTA | COROLLA | GREEN | Citation | 21-201(a1) | Transportation Article | FALSE | WHITE | F | FAIRFAX STATION | VA | VA | A - Marked Patrol | 2017 | 8 | 2017-08-01 | 29 |
| 2014-12-01 | 12H 52M 0S | MCP | 6th district, Gaithersburg / Montgomery Village | FAILURE STOP AND YIELD AT THRU HWY | CHRISTOPHER AVE/MONTGOMERY VILLAGE AVE | 39.16289 | -77.22909 | FALSE | FALSE | FALSE | TRUE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | MD | 02 - Automobile | 2001 | HONDA | ACCORD | SILVER | Citation | 21-403(b) | Transportation Article | FALSE | BLACK | F | UPPER MARLBORO | MD | MD | A - Marked Patrol | 2014 | 12 | 2014-12-01 | 1 |
| 2017-08-29 | 9H 22M 0S | MCP | 3rd district, Silver Spring | FAILURE YIELD RIGHT OF WAY ON U TURN | CHERRY HILL RD./CALVERTON BLVD. | 39.05698 | -76.95463 | FALSE | FALSE | FALSE | TRUE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | MD | 02 - Automobile | 1998 | DODG | DAKOTA | WHITE | Citation | 21-402(b) | Transportation Article | FALSE | BLACK | M | FORT WASHINGTON | MD | MD | A - Marked Patrol | 2017 | 8 | 2017-08-01 | 29 |
| 2017-08-28 | 23H 41M 0S | MCP | 6th district, Gaithersburg / Montgomery Village | FAILURE OF DR. TO MAKE LANE CHANGE TO AVAIL. LANE NOT IMMED. ADJ. TO STOPPED EMERG. VEH, | 355 @ SOUTH WESTLAND DRIVE | NA | NA | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | MD | 02 - Automobile | 2015 | MINI COOPER | 2S | WHITE | Citation | 21-405(e1) | Transportation Article | FALSE | WHITE | M | GAITHERSBURG | MD | MD | A - Marked Patrol | 2017 | 8 | 2017-08-01 | 28 |
| 2013-08-27 | 55M 0S | MCP | 2nd district, Bethesda | NEGLIGENT DRIVING VEHICLE IN CARELESS AND IMPRUDENT MANNER ENDANGERING PROPERTY, LIFE AND PERSON | CONNECTICUT/CHEVY CHASE LAKE | NA | NA | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | FALSE | MD | 02 - Automobile | 2013 | HYUNDAI | ELANTRA | GRAY | Citation | 21-901.1(b) | Transportation Article | FALSE | WHITE | F | SILVER SPRING | MD | MD | A - Marked Patrol | 2013 | 8 | 2013-08-01 | 27 |
str(traffic)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 1437177 obs. of 38 variables:
## $ date_of_stop : Date, format: "2013-09-24" "2017-08-29" ...
## $ time_of_stop :Formal class 'Period' [package "lubridate"] with 6 slots
## .. ..@ .Data : num 0 0 0 0 0 0 0 0 0 0 ...
## .. ..@ year : num 0 0 0 0 0 0 0 0 0 0 ...
## .. ..@ month : num 0 0 0 0 0 0 0 0 0 0 ...
## .. ..@ day : num 0 0 0 0 0 0 0 0 0 0 ...
## .. ..@ hour : num 17 10 12 9 23 0 13 0 23 23 ...
## .. ..@ minute: num 11 19 52 22 41 55 23 38 41 41 ...
## $ agency : chr "MCP" "MCP" "MCP" "MCP" ...
## $ subagency : chr "3rd district, Silver Spring" "2nd district, Bethesda" "6th district, Gaithersburg / Montgomery Village" "3rd district, Silver Spring" ...
## $ description : chr "DRIVING VEHICLE ON HIGHWAY WITH SUSPENDED REGISTRATION" "DRIVER FAILURE TO OBEY PROPERLY PLACED TRAFFIC CONTROL DEVICE INSTRUCTIONS" "FAILURE STOP AND YIELD AT THRU HWY" "FAILURE YIELD RIGHT OF WAY ON U TURN" ...
## $ location : chr "8804 FLOWER AVE" "WISCONSIN AVE@ ELM ST" "CHRISTOPHER AVE/MONTGOMERY VILLAGE AVE" "CHERRY HILL RD./CALVERTON BLVD." ...
## $ latitude : num NA 39 39.2 39.1 NA ...
## $ longitude : num NA -77.1 -77.2 -77 NA ...
## $ accident : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ belts : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ personal_injury : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ property_damage : logi FALSE FALSE TRUE TRUE FALSE FALSE ...
## $ fatal : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ commercial_license : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ hazmat : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ commercial_vehicle : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ alcohol : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ work_zone : logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ state : chr "MD" "VA" "MD" "MD" ...
## $ vehicletype : chr "02 - Automobile" "02 - Automobile" "02 - Automobile" "02 - Automobile" ...
## $ year : num 2008 2001 2001 1998 2015 ...
## $ make : chr "FORD" "TOYOTA" "HONDA" "DODG" ...
## $ model : chr "4S" "COROLLA" "ACCORD" "DAKOTA" ...
## $ color : chr "BLACK" "GREEN" "SILVER" "WHITE" ...
## $ violation_type : chr "Citation" "Citation" "Citation" "Citation" ...
## $ charge : chr "13-401(h)" "21-201(a1)" "21-403(b)" "21-402(b)" ...
## $ article : chr "Transportation Article" "Transportation Article" "Transportation Article" "Transportation Article" ...
## $ contributed_to_accident: logi FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ race : chr "BLACK" "WHITE" "BLACK" "BLACK" ...
## $ gender : chr "M" "F" "F" "M" ...
## $ driver_city : chr "TAKOMA PARK" "FAIRFAX STATION" "UPPER MARLBORO" "FORT WASHINGTON" ...
## $ driver_state : chr "MD" "VA" "MD" "MD" ...
## $ dl_state : chr "MD" "VA" "MD" "MD" ...
## $ arrest_type : chr "A - Marked Patrol" "A - Marked Patrol" "A - Marked Patrol" "A - Marked Patrol" ...
## $ year_of_stop : num 2013 2017 2014 2017 2017 ...
## $ month_of_stop : num 9 8 12 8 8 8 10 4 8 8 ...
## $ month_year : Date, format: "2013-09-01" "2017-08-01" ...
## $ day_of_stop : int 24 29 1 29 28 27 8 24 28 28 ...
ggplot(data = traffic) + geom_bar(aes(x = year_of_stop))+ggtitle("Traffic Violations by Year")+xlab("Year")+ylab("Violations")
plot_data <- traffic %>% group_by(month_year) %>% summarise(number_of_stops = n()) %>% ungroup() %>% mutate(month_of_stop = lubridate::month(month_year))
#create plot
plotly::ggplotly(ggplot(data = plot_data)+geom_col(aes(x = month_year, y = number_of_stops, fill = month_of_stop))+theme(legend.position = "none")+ggtitle("Traffic Violations over Time")+xlab("Time")+ylab("Number of Stops"))
plot_data <- traffic %>% mutate(point_in_month = (lubridate::period_to_seconds(lubridate::days(day_of_stop-1)) + lubridate::period_to_seconds(time_of_stop))/lubridate::period_to_seconds(lubridate::days(lubridate::days_in_month(date_of_stop))),point_in_month = (findInterval(point_in_month,(0:30)/30, all.inside = TRUE)-1)/30)
ggplot(data = plot_data)+geom_bar(aes(x = point_in_month))+ggtitle("Are Violations More Likely at Certain Times of a Month")+xlab("Time of Month")+ylab("Violations")
clock_hours <- c(12,1:11)
clock_minutes <- c(paste0(0,0:9),10:59)
clock_am_pm <-c("AM","PM")
clock_order<-character(length(clock_hours)*length(clock_minutes)*length(clock_am_pm))
index<-1
for(c_suffix in clock_am_pm){
for(c_hour in clock_hours){
for(c_minute in clock_minutes){
clock_order[index]<-paste0(c_hour,":",c_minute,c_suffix)
index<-index+1
}
}
}
plot_data <- traffic%>%mutate(minute = lubridate::period_to_seconds(time_of_stop)/60)
plotly::ggplotly(ggplot(data = plot_data) + geom_bar(aes(x = minute, text = clock_order[minute+1]))+ xlab("Time") + ylab("Number of Stops")+ggtitle("Number of Stops by Time of Day") + scale_x_continuous(breaks = (0:23)*60, labels = c("12:00AM", paste0(1:11,":00AM"), "12:00PM", paste0(1:11, ":00PM")))+ theme(axis.text.x = element_text(angle=90,hjust=1)))
## Warning: Ignoring unknown aesthetics: text
#these are the furtest latitude/longitude for Montgomery County in any given direction, based on Google Maps
north <- 39.36
south <- 38.92
west <- -77.55
east <- -76.88
ggplot(data = traffic%>%filter(longitude>west,longitude<east,latitude<north,latitude>south)) + geom_point(aes(x = longitude, y = latitude),alpha = .2, size = .1)+ggtitle("Where do Violations Occur?")
For this exam, I worked with the data set of traffic violationis in Montgomery County, Maryland. The dataset was found at https://catalog.data.gov/dataset/traffic-violations-56dda. I dropped the data from 2019, which is only partially complete. The data was mostly clean from the site, but I converted the dates and times using the lubridate package and converted the character Yes/No values into boolean TRUE/FALSE values. I chose this data set because I thought that it would be interesting to see if there are any trends in traffic enforcement/violations in the county. Overall, I didn’t find too much that I would consider to be surprising. The peak in violations around midnight isn’t unexpected. It is likely explained by an increase in drunk drivers in addition to the violations that would be regularly commited at any given time.
I tried a number of visualizations. First I checked the number of violations recorded each year. The number of violations increased each year between 2012 and 2015, but has since leveled out and the number of violations are slightly down. Next I made the same graph, but upped the resolution to monthly. It showed mostly the same thing, but also revealed that there were a few months with random spikes in violations. I couldn’t see any pattern in these months however. The next graph breaks down the violations by the point in the month at which they occurred. The goal of this graph was to see if monthly quotas were used that may cause an increase in violations at certain points in the month as officers try to meet the quota. The graph showed that violations were consistent at every point of the month, with no significant spikes or dips. Next, I looked at the time of day at which violations were issued. This graph showed a significant variation in the rate at which violations were issued depending on the time of day. Signifcantly more violations are issued at night, peaking around midnight. The fewest violations are issued around 5am, likely because much fewer drivers are on the road. Finally, I created a scatter plot based on the longitude and latitude of the violations. This more or less created a map of the county’s most traveled roads. I had to filter out some of the longitude and latitude data which was well outside of the county’s boundaries. There were a handful of data points that had longitude and latitudes which indicated that the violation ocurred hundreds of miles away from Montgomery County, including a few in the middle of the Atlantic Ocean. I found this graph to be pretty interesting, without using any special geospatial graphing, the county’s boundaries are pretty clear, and the locations of the bigger cities and towns are quite visible.